
library(dplyr)
library(ggplot2)
library(ggsci)
library(cowplot)


df_in <- read.csv("data/pred_res_nested/agg_wide_not_scaled.csv")

#
# Prepare data
#
df <- df_in %>% 
  mutate(
    acc = acc_mean,
    acc_lb = acc_mean - acc_ci,
    acc_ub = acc_mean + acc_ci
  ) %>%
  select(clf_name, acc, acc_lb, acc_ub) %>%
  arrange(acc)

#
# Plot
#
baseline = df[df$clf_name == "dummy_most_freq", "acc"]

df <- df %>% filter(clf_name != "dummy_most_freq")

df$clf <- c(
  "Naive Bayes",
  "Linear SVM",
  "Logistic Regression",
  "Gradient Boosted Trees",
  "Random Forest"
)

df$clf <- factor(df$clf, levels = rev(df$clf))

col <- pal_aaas(palette = c("default"), alpha = 1)(4)[3]


plt <- df %>%
  ggplot(aes(x=clf, y=acc)) +  
  geom_col(width=0.7, fill=col) + 
  geom_text(aes(label=round(acc, 2)), size=5, position = position_stack(vjust = 0.81), colour="white")+
  geom_errorbar(aes(ymin=acc_lb, ymax=acc_ub), position=position_dodge(.9), width=.25, size=0.5, alpha=0.8) +
  geom_hline(yintercept=baseline, color="black", size=0.4, linetype="dashed") +
  coord_flip()+
  xlab("") +
  ylab("Accuracy") + 
  theme_bw() +
  theme(
    text = element_text(family="Roboto"),
    axis.title.x = element_text(size=13),
    axis.text.x = element_text(size=11),
    axis.text.y = element_text(size=13, color="black")
  )

print(plt)

save_plot(
  "plots/pdfs/classifiers_bar_plot.pdf", 
  plt, 
  device=cairo_pdf,
  base_height = 3.5, 
  base_aspect_ratio = 2.2
)

# END